#!/usr/bin/env python
# -*- encoding: utf-8 -*-
"""
@File    :   zh_website_content.py    
@Contact :   291622538@qq.com

@Modify Time      @Author    @Version    @Description
------------      -------    --------    -----------
2020/12/3 10:04   fan        1.0         None
"""
from abc import ABC

from scrapy import Spider
from scrapy import Request
from scrapy.crawler import CrawlerProcess


def website_content_parse(response, **kwargs):
    print(response.url)
    title = response.xpath('//title/text()').extract()
    description = response.xpath('//meta[@name="description"]/@content').extract()
    keywords = response.xpath('//meta[@name="keywords"]/@content').extract()
    print(title)
    print(description)
    print(keywords)


def save_overtime(response, **kwargs):
    print(response)


class ZhWebsiteContent(Spider, ABC):
    # 定义爬虫名称
    name = "zh_website_content_spider"

    # custom_settings = {
    #     'ITEM_PIPELINES': {'fan_project.pipelines.SaveIpInfoPipeline': 300},
    # }

    def start_requests(self):
        url = "http://garrettcounty.org"
        yield Request(url, callback=website_content_parse, dont_filter=False)


if __name__ == '__main__':
    process = CrawlerProcess()
    process.crawl(ZhWebsiteContent)
    process.start()
